*diff in diff analysis 
*last modified: 12 May 2025
*last modified by: Charlotte Ambrozek
*this do file implements the eventstudyinteract package to estimate individual 2x2 doubly robust did estimates and aggregate to obtain an att 

*-------------------------------------------------------------------------------
*--- Preamble
*-------------------------------------------------------------------------------

clear all
set more off
set rmsg on

set maxvar 12000
set emptycells drop

*ssc install reghdfe, replace
*ssc install avar, replace
*ssc install eventstudyinteract, replace
*ssc install erepost, replace

*-------------------------------------------------------------------------------
*--- Directories and Log 
*-------------------------------------------------------------------------------

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/ewic_sa_tip`date', replace

mkf allefsy
cwf allefsy
use `data_dir'/allefsy_panel_naics.dta, clear

mkf efsy 
cwf efsy
use `data_dir'/efsy_panel_naics.dta, clear

*-------------------------------------------------------------------------------
*--- Data and Frames 
*-------------------------------------------------------------------------------

*import cleaned eWIC implementation info into own frame
*eWIC implementation variable is at county fips/fiscal year level 
*partition data into state/treatment year groups

*import cleaned ewic implementation info into own frame
mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

mkf income
frame income{
	use ./data/cleaned/countyyearmedianincome.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

mkf countychars
frame countychars{
	use ./data/cleaned/countystats.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

mkf demos
frame demos{
	use ./data/cleaned/demos.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

cwf ewic_fy
frame put ct_fips ev_year state, into(timings)

cwf timings
duplicates drop
encode state, generate(state_code)

frlink m:1 ct_fips, frame(countychars) generate(countylink)
frget medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(countylink)
frlink m:1 ct_fips, frame(demos) generate(demoslink)
frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5, from(demoslink)
gen log_pop = log(total_pop)

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	
	*drop Vermont and mississippi because of different regiemes prior to eWIC
	*drop NV because ewic turn on then off then on again and we don't have reliable timings
	*drop missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)

	*flag stores that specialize in WIC
	gen a50 = vendor_type1 == 1 | vendor_type1 == 7 | vendor_type2 == 1 | vendor_type2 == 7
	
	*link in low income/high poverty info
	frlink m:1 ct_fips, frame(income) generate(inc_link)
	frget lowinc highpov highwicelig, from(inc_link)

	*link in controls from timings frame
	frlink m:1 ct_fips, frame(timings) generate(timings_link)
	frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5 medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(timings_link)

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)
	
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups at future level of xtset 
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	xtset tip_id fiscalyear
	compress
}

*--- a.- County Business Patterns (CBP) with Imputation & all NAICS - 99.97% match & Population +15 - 99.96% match
*--- b.- County Business Patterns (CBP) with Imputation - 99.39% match & Population +15 - 99.96% match
*--- c.- County Business Patterns (CBP) U.S. Census Bureau - 99.41% match & Population +15 - 99.96% match

foreach data in allefsy efsy cbp {
	
 cwf tip_fy
 preserve

 *Merge County Business Patterns (CBP) Database
 merge m:1 ct_fips fiscalyear using `data_dir'/`data'_panel_naics.dta,   ///
       generate(ewic_`data') 

 tab ct_fips if ewic_`data'==2 // unmatched from CBP data
 drop if ewic_`data'==2

 tab ct_fips if ewic_`data'==1 // unmatched from tip data

 *Merge County Population +15 NIH
 merge m:1 ct_fips fiscalyear using `data_dir'/population_15plus.dta, ///
       generate(ewic_pop) keepusing(pop) 

 tab ct_fips if ewic_pop==2 // unmatched from population data
 drop if ewic_pop==2

 tab ct_fips if ewic_pop==1 // unmatched from tip data

 *Employment in establishments per +15 population
 gen emp_15plus = emp/pop
 sum emp_15plus

*-------------------------------------------------------------------------------
*--- Variables for Sun & Abraham (2021)
*-------------------------------------------------------------------------------

foreach c in never last {

 cap drop timeToTreat L* F* 
 cap drop ebt
 
 *Control: never-treated units
 if ("`c'"=="never") {
  tab ev_year, m
  gen ev_year_never=ev_year
  replace ev_year_never=. if ev_year_never>2018
  gen never_year=(ev_year_never==.)
  tab never_year, m
  
  *Generate treatment
  gen ebt=(fiscalyear>=ev_year_never)
  tab ebt, m
  
  *Relative time
  gen timeToTreat=fiscalyear-ev_year_never
  tab timeToTreat, m
  
  forvalues l = 0/4 {
   gen L`l' = (timeToTreat==`l')
  }
  gen L5 = (timeToTreat>=5) 

  forvalues f = 1/4 {
   gen F`f' = (timeToTreat==-`f')
  }
  gen F5 = (timeToTreat<=-5)

  drop F1 // Baseline t=-1 
  gen F1 = 0 
  
  local never "_never"
}

*Control: last-treated unit
 if ("`c'"=="last") {
  sum ev_year
  gen last_year=(ev_year==r(max))
  tab last_year, m
  
  *Generate treatment
  gen ebt=(fiscalyear>=ev_year)
  replace ebt=. if ev_year>2018 & ev_year<2023 // Only use last-treated
  tab ebt, m
  
  *Relative time
  gen timeToTreat=fiscalyear-ev_year
  tab timeToTreat, m
  
  forvalues l = 0/4 {
   gen L`l' = (timeToTreat==`l')
  }
  gen L5 = (timeToTreat>=5) 

  forvalues f = 1/4 {
   gen F`f' = (timeToTreat==-`f')
  }
  gen F5 = (timeToTreat<=-5)

  drop F1 // Baseline t=-1 
  gen F1 = 0 
  
  local never 
 }

 local times F5 F4 F3 F2 L0 L1 L2 L3 L4 L5

*-------------------------------------------------------------------------------
*--- Estimates: 2x2 All, Chain, Independent
*-------------------------------------------------------------------------------

 cd `out_dir'

 *Samples
 cap drop all indep
 gen all=1
 gen indep=(chain==0 & chain!=.)

 lab var ebt "WIC EBT"

 foreach sample in all chain indep {

  #delimit ;
  eventstudyinteract auth ebt if `sample'==1, vce(cluster st_fips)  
                                  cohort(ev_year`never') 
				  control_cohort(`c'_year)
				  absorb(tip_id fiscalyear) 
				  covariates(emp_15plus);
  #delimit cr
  
  mat beta = e(b_iw)
  mat sigma = e(V_iw)
  erepost b=beta V=sigma, rename
  estimates save `out_dir'/sa_tip_simple_`data'_`c'_`sample'.ster, replace

  #delimit ;
  eventstudyinteract auth ebt if `sample'==1, vce(cluster st_fips)  
                                  cohort(ev_year`never') 
				  control_cohort(`c'_year)
				  absorb(tip_id fiscalyear);
  #delimit cr
  
  mat beta = e(b_iw)
  mat sigma = e(V_iw)
  erepost b=beta V=sigma, rename
  estimates save `out_dir'/sa_tip_simple_nocov_`data'_`c'_`sample'.ster, replace

 }

*-------------------------------------------------------------------------------
*--- Estimates: Event Studies All, Chain, Independent
*-------------------------------------------------------------------------------

 cd `out_dir'
  
 *Periods
 gen t_`c'=_n in 1/9
 
 foreach sample in all chain indep {
	
  #delimit ;
  eventstudyinteract auth `times' if `sample'==1, vce(cluster st_fips)  
                                  cohort(ev_year`never') 
				  control_cohort(`c'_year)
				  absorb(tip_id fiscalyear) 
				  covariates(emp_15plus);
  #delimit cr				 
	  
  mat b  = e(b_iw)
  mat V  = e(V_iw)

  mat blead = b[1,2..4]
  mat blag  = b[1,5..9]
  mata st_matrix("se",sqrt(diagonal(st_matrix("e(V_iw)"))))
  mat se = se'
  mat slead = se[1,2..4]
  mat slag  = se[1,5..9]
 
  mat beta = blead,0,blag
  mat se   = slead,0,slag

  ereturn post b V
   
  *Estimates
  cap drop est_`c' lb_`c' ub_`c'
  gen est_`c'=.
  gen lb_`c'=.
  gen ub_`c'=.

  *ATT pre period
  lincom (F4 + F3 + F2)/3
  replace est_`c'=r(estimate) if t_`c'<4
  replace lb_`c'=r(lb) if t_`c'<4
  replace ub_`c'=r(ub) if t_`c'<4

  *ATT post period
  lincom (L0 + L1 + L2 + L3 + L4)/5
  replace est_`c'=r(estimate) if t_`c'>=5 & t_`c'!=.
  replace lb_`c'=r(lb) if t_`c'>=5 & t_`c'!=.
  replace ub_`c'=r(ub) if t_`c'>=5 & t_`c'!=.

*-------------------------------------------------------------------------------
*--- Export Results: Figure
*-------------------------------------------------------------------------------

  *ssc install coefplot, replace
  *ssc install blindschemes, replace 
  set scheme plotplainblind
  graph set window fontface "Times New Roman"
 
  if ("`sample'"=="all")   local color1 black
  if ("`sample'"=="chain") local color1 emerald
  if ("`sample'"=="indep") local color1 orange
 
  if ("`sample'"=="all")   local msym O
  if ("`sample'"=="chain") local msym O
  if ("`sample'"=="indep") local msym T
  
  summ est_`c' if t_`c'<4, meanonly
  local est_pre = round(r(mean), 0.001)
  summ lb_`c' if t_`c'<4, meanonly
  local lb_pre = round(r(mean), 0.001)
  summ ub_`c' if t_`c'<4, meanonly
  local ub_pre = round(r(mean), 0.001)
  
  summ est_`c' if t_`c'>=5, meanonly
  local est_post = round(r(mean), 0.001)
  summ lb_`c' if t_`c'>=5, meanonly
  local lb_post = round(r(mean), 0.001)
  summ ub_`c' if t_`c'>=5, meanonly
  local ub_post = round(r(mean), 0.001)
  
  #delimit ;
  coefplot matrix(beta), se(se) vertical legend(off)                      
           ytitle("ATT estimate") xtitle("Event Year")          
           xlabel(1 "-4" 2 "-3" 3 "-2" 4 "-1" 5 "0" 6 "1" 
	   7 "2" 8 "3" 9 "4") 
 	   yline(0, lcolor(black) lpattern(dash)) 
           xline(5, lcolor(red) lpattern(dot) lwidth(medthick)) 
           ciopts(recast(rcap) msize(vtiny) lcolor(`color1')) 
	   recast(scatter) msymbol(`msym') mcolor(`color1') msize(medlarge)
           addplot(rarea ub_`c' lb_`c' t_`c' if t_`c'<4, color(gs9%50) 
	   lcolor(`color1') ||
	   rarea ub_`c' lb_`c' t_`c' if t_`c'>=5, color(gs9%50) 
           lcolor(`color1') ||
	   line est_`c' t_`c' if t_`c'<4, lcolor(gs9) lpattern(solid) 
	   lcolor(`color1') ||
	   line est_`c' t_`c' if t_`c'>=5, lcolor(gs9) lpattern(solid)  
	   lcolor(`color1') below) note("ATT estimate for the pre period is `est_pre' with a 95% CI of (`lb_pre', `ub_pre')." "ATT estimate for the post period is `est_post' with a 95% CI of (`lb_post', `ub_post').");
  #delimit cr

  graph export "`graph_dir'/sa_tip_`data'_`c'_`sample'.png", replace
  
 }
}
 restore
}

log close
